!pip install arviz==0.6.1
!pip install pymc3==3.8
!pip install Theano==1.0.4
%matplotlib inline
import numpy as np
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import seaborn as sns
import plotly.express as px
#import load_covid_data
sns.set_context('talk')
plt.style.use('seaborn-whitegrid')
import warnings
warnings.filterwarnings("ignore")
def load_individual_US_timeseries(name, delCols=[]):
base_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series'
url = f'{base_url}/time_series_covid19_{name}.csv'
df = pd.read_csv(url,
index_col=['Country_Region', 'Admin2',
'Province_State', 'Lat', 'Long_', ])
if (len(delCols) > 0):
extraCols = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Combined_Key'] + delCols
else:
extraCols = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Combined_Key']
# Admin2 == 'City/Region' for US
#
df.drop(extraCols, axis=1, inplace=True)
df['type'] = name.lower()
df.columns.name = 'date'
extraCols = ['Lat', 'Long_']
df = (df.set_index('type', append=True)
.reset_index(extraCols, drop=True)
.stack()
.reset_index()
.set_index('date')
)
df.index = pd.to_datetime(df.index)
df.columns = ['country', 'admin2','state', 'type', 'cases']
return df
def add_confirmed_after_n_days_column(df, n_days=100, relevant_threshold=100):
affected_states = df[df.cases > relevant_threshold].state.unique()
query = (df.cases >= n_days)
cols = ["state", "date"]
date_since_n_lookup = dict(
(df[query][cols].groupby("state").min().reset_index()).values
)
first_confirmed_date = f"first_{n_days}_confirmed_date"
days_since = f"days_since_{n_days}"
df[first_confirmed_date] = df.state.apply(lambda x: date_since_n_lookup.get(x))
df[days_since] = (df.date - df[first_confirmed_date]).dt.days
df = df.drop(columns=[first_confirmed_date])
df = df.set_index("date")
return df
## Data in Long format
cases_df = load_individual_US_timeseries('confirmed_US')
cases_df = add_confirmed_after_n_days_column(cases_df.reset_index())
cityCases_df = cases_df.groupby(['state', 'admin2', 'date', 'type']).sum().reset_index()
cityCases_df = add_confirmed_after_n_days_column(cityCases_df, 100)
stateCases_df = cityCases_df.groupby(['state', 'date', 'type']).sum().reset_index()
stateCases_df = add_confirmed_after_n_days_column(stateCases_df, 100)
## Data in Wide format
confirmed_cases_us_url = 'https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv?raw=true'
confirmed_cases_url = confirmed_cases_us_url
confirmed_cases = pd.read_csv(confirmed_cases_url, sep=',')
cases_df.head()
confirmed_cases.head()
plotlyChart = True
#plotlyChart = False
def plotStateGeo(stateName, title="Infections by Date", plotly=True):
dateCols = confirmed_cases.columns.str.contains('/20')
colList = list(confirmed_cases.columns[dateCols])
look_df = confirmed_cases.loc[confirmed_cases["Province_State"]== stateName,
dateCols].transpose()
firstDateState = look_df.loc[look_df.sum(axis=1) > 0, ].head(1).index[0]
slice_df = confirmed_cases.loc[confirmed_cases["Province_State"]== stateName,
['Admin2'] + colList[colList.index(firstDateState):]].copy()
if (plotly):
chart_df = pd.melt(slice_df, id_vars=['Admin2'],
var_name='date', value_name='cases')
chart_df.columns = ['area', 'date', 'cases']
chart_df['state'] = stateName
fig = px.area(chart_df, x="date", y="cases", color="area",
line_group="state",
labels={
"cases": "Confirmed Cases",
"date": "",
},
title=title)
fig.update_layout(showlegend=True)
fig.show()
else:
lol = slice_df[colList[colList.index(firstDateState):]].values.tolist()
with warnings.catch_warnings():
fig = plt.figure(figsize=(20,10))
fig.subplots_adjust(top=0.8)
ax1 = fig.add_subplot(111)
ax1.set_ylabel('Confirmed Cases')
ax1.set_xlabel('')
ax1.set_title(title)
x = range(len(lol[0]))
ax1.stackplot(x, lol)
for stateName in ['New York', 'Illinois', 'New Jersey']:
plotStateGeo(stateName, stateName+"- Infections by Initial Detection Date", plotlyChart )
def plotAreas(df, plotly, titleString):
if (plotly):
fig = px.scatter(df, x="lastN", y="change", log_x=False, size='size',
color='area', hover_name="area", hover_data=["size"])
fig.update_layout(
title=titleString,
xaxis_title="Time (days elapsed)",
yaxis_title="R (day over day slope)",
)
#fig.update_xaxes(autorange="reversed")
fig.show()
else:
ax = sns.relplot(y="change", x="lastN", hue='area', size="size",
sizes=(40, 400), alpha=.5, palette="muted",
height=6, data=df)
ax.fig.set_size_inches(20,10)
ax.set(xlabel="Time (days elapsed)", ylabel="R (day over day slope)",
title=titleString)
#plt.gca().invert_xaxis()
ax._legend.remove()
plt.legend(ncol=3, prop={'size': 8})
plt.show()
def plotStates(df, plotly, titleString):
if (plotly):
fig = px.scatter(df, x="lastN", y="change", log_x=False, size='size',
color='state', hover_name="state", hover_data=["size"])
fig.update_layout(
title=titleString,
xaxis_title="Time (days elapsed)",
yaxis_title="R (day over day slope)",
)
#fig.update_xaxes(autorange="reversed")
fig.show()
else:
ax = sns.relplot(y="change", x="lastN", hue='state', size="size",
sizes=(40, 400), alpha=.5, palette="muted",
height=6, data=df)
ax.fig.set_size_inches(20,10)
ax.set(xlabel="Time (days elapsed)", ylabel="R (day over day slope)",
title=titleString)
ax._legend.remove()
plt.legend(ncol=3, prop={'size': 8})
#plt.gca().invert_xaxis()
plt.show()
Use slope of form y = mX + B (polynomial degree = 1)
Every week we calculate the slope for a window of last 'n' days.
stateName = 'Florida'
stateLevel_df = cityCases_df.loc[cityCases_df['state'] == stateName,].copy()
lol = []
for s in stateLevel_df['admin2'].unique():
for week in np.arange(2,20):
days = week * 7
slice_df = stateLevel_df.loc[stateLevel_df['admin2'] == s,].copy()
slice_df = slice_df[slice_df["cases"] >= 100]
since100 = len(slice_df.index)
slice_df = slice_df.tail(days)
l = len(slice_df.index)
if ((l == 0) | (l >= since100)):
continue
y = slice_df.head(14)['cases']
lastY = y[0] # cases seen at start of period
#print (s, days, len(y), lastY,
# slice_df.head(1).index.values[0],
# slice_df.tail(1).index.values[0])
x = np.arange(0, len(y))
polynomialDegree = 1
res = np.polyfit(x, np.log(y), polynomialDegree)
# For polynomial degree 1:
# y = res[0] * X + res[1]
powerx = str(1+np.round(res[0],4))
lol.append([s, since100, 1+np.round(res[0],4), lastY, days])
flat_df = pd.DataFrame(lol, columns=['area', 'since100','change', 'size', 'lastN'])
flat_df = flat_df.fillna(0)
flat_df = flat_df.sort_values(by=['change'], ascending=False)
flat_df = flat_df[flat_df['change'] > 0]
flat_df = flat_df.sort_values(by=['area','lastN'], ascending=[False, False])
state_area_df = flat_df.copy()
state_area_df['lastN'] = -state_area_df['lastN']
state_area_df.head()
plotAreas (state_area_df,
plotlyChart, "Covid19 Transmission Rates within "+ stateName)
lol = []
for s in stateCases_df['state'].unique():
for week in np.arange(2,20):
days = week * 7
slice_df = stateCases_df.loc[stateCases_df['state'] == s,].copy()
slice_df = slice_df[slice_df["cases"] >= 100]
since100 = len(slice_df.index)
slice_df = slice_df.tail(days).copy()
l = len(slice_df.index)
if ((l == 0) | (l >= since100)):
continue
y = slice_df.head(14)['cases']
lastY = y[0] # cases seen at start of period
x = np.arange(0, len(y))
polynomialDegree = 1
res = np.polyfit(x, np.log(y), polynomialDegree)
# For polynomial degree 1:
# y = res[0] * X + res[1]
powerx = str(1+np.round(res[0],4))
lol.append([s, since100, 1+np.round(res[0],4), lastY, days])
flat_df = pd.DataFrame(lol, columns=['state', 'since100','change', 'size', 'lastN'])
flat_df = flat_df.fillna(0)
flat_df = flat_df.sort_values(by=['change'], ascending=False)
flat_df = flat_df[flat_df['change'] > 0]
flat_df = flat_df.sort_values(by=['state','lastN'], ascending=[False, False])
states_all_df = flat_df.copy()
states_all_df['lastN'] = -states_all_df['lastN']
#states_all_df.loc[states_all_df['state'] == 'Florida',].head(20)
stateList = ['New York', 'Illinois', 'New Jersey']
plotStates (states_all_df.loc[states_all_df['state'].isin(stateList),],
plotlyChart, "States with Early Cases of Covid19")
def measureChange(stateName, lastN=30):
l = (stateCases_df['state'] == stateName) & (stateCases_df['cases'] > 0)
return (stateCases_df.loc[l,'cases'].pct_change()[-lastN:].sum())
d = {"states": pd.Series(stateCases_df['state'].unique())}
ranked_df = pd.DataFrame(d)
nDays = 14
ranked_df['rate'] = ranked_df.apply(lambda row :
measureChange(row['states'], nDays), axis=1)
ranked_df = ranked_df.sort_values(by='rate')
ranked_df.head()
stateList = ranked_df.head(10)['states']
stateCount = len(stateList)
plotStates (states_all_df.loc[states_all_df['state'].isin(stateList),],
plotlyChart, "Top "+ str(stateCount)
+" States with Declining/Low Rates of Covid19 (last "+str(nDays)+" days)")
topState = ranked_df.head(1).iloc[0]['states']
plotStateGeo(topState, topState+' - is Least Infection Change % by State (last '+str(nDays)+" days)" , plotlyChart)
stateList = ranked_df.tail(10)['states']
plotStates (states_all_df.loc[states_all_df['state'].isin(stateList),],
plotlyChart, "Top "+ str(stateCount)
+" States with Rising/High Rates of Covid19 (last "+str(nDays)+" days)")
topState = ranked_df.tail(1).iloc[0]['states']
plotStateGeo(topState, topState+' - is Most Infection Change % by State (last '+str(nDays)+" days)", plotlyChart)
stateList = ['Arkansas', 'Iowa', 'Nebraska', 'North Dakota',
'South Dakota', 'Utah', 'Wyoming']
stateCount = len(stateList)
plotStates (states_all_df.loc[states_all_df['state'].isin(stateList),],
plotlyChart, "The "+ str(stateCount)
+" States with No Lock-down Policy")
stateList = ['Arizona', 'Florida', 'Texas', 'California']
for stateName in stateList:
plotStateGeo(stateName, stateName+"- Infections by Initial Detection Date",
plotlyChart )
plotStates (states_all_df.loc[states_all_df['state'].isin(stateList),],
plotlyChart, "The Large Increasing States: "+ ",".join(stateList)
+" combined Chart")
stateList = ['New York', 'New Jersey', 'Illinois', 'Massachusetts']
plotStates (states_all_df.loc[states_all_df['state'].isin(stateList),],
plotlyChart, "The Large Decreasing States: "+ ",".join(stateList)
+" combined Chart")
dateCols = confirmed_cases.columns.str.contains('/20')
colList = ['Admin2', 'Province_State'] + list(confirmed_cases.columns[dateCols])
l = confirmed_cases['Admin2'].notnull() & (confirmed_cases['1/30/20'] != 0)
confirmed_cases.loc[l, colList].head()
N = 5
y_func = [float(x)**(1.36) for x in range(0,N)]
np.random.seed(1729)
x = np.arange(0,N)
print (x.size)
y_noise = 0.3 * np.random.normal(size=x.size)
y = y_func + y_noise
polynomialDegree = 2
trend = np.polyfit(x,y,polynomialDegree)
plt.plot(x,y,'o')
print ('Slope:', trend[0])
print ('Intercept:', trend[1])
trendpoly = np.poly1d(trend)
plt.plot(x,trendpoly(x))
plt.title('Fit polynomial of degree:'+str(polynomialDegree))
plt.show()